
# Setup -------------------------------------------------------------------
library(spikes);library(tidyverse)


# Data --------------------------------------------------------------------
load("Forensics_data.rdata")


## Functions ---------------------------------------------------------------
get_plotframe_spikes <- function(spikesout){
  
  d <- function(z) density(z, from = 0, to = 1, cut = TRUE, 
                           n = spikesout$grid, bw = spikesout$bw, kernel = "gaussian")
  ydens <- d(spikesout$data$v/spikesout$data$t)
  plotframe <- tibble("vote_share_bin" = spikesout$x,
                      "kernel_density_upper_bound" = spikesout$ymax,
                      "observed_vote_shares" = ydens$y)
  
  observed_larger_than_density <- which(ydens$y >= spikesout$ymax)
  observed_larger_than_density <- observed_larger_than_density[spikesout$w[observed_larger_than_density] != 0]
  fraud_points <- spikesout$x[observed_larger_than_density]
  
  res <- list("plotframe" = plotframe,
              "fraud_points" = fraud_points)
  return(res)
  
}

get_fraudulent_result <- function(from_spikes){
  d <- function(z) density(z, from = 0, to = 1, cut = TRUE, 
                           n = from_spikes$grid, bw = from_spikes$bw, kernel = "gaussian")
  ydens <- d(from_spikes$data$v/from_spikes$data$t)
  w <- which(ydens$y >= from_spikes$ymax)
  w <- w[from_spikes$w[w] != 0]
  point <- from_spikes$x[w]
  wid <- 6 * from_spikes$w[w]/max(from_spikes$w[w])
  
  return(list(point, wid))
  
}


# RKD ---------------------------------------------------------------------


## PF ----------------------------------------------------------------------
set.seed(1964)
pf_tmp <- rkd[which(rkd$party=="PF"),]
spikedat <- pf_tmp[, c("voters", "sum_of_votes", "votes", "valid_votes")]
spikedat <- spikedat[which(spikedat$voters>spikedat$sum_of_votes),]
spikedat <- spikedat[which(spikedat$sum_of_votes>spikedat$votes),]
spikedat$valid_votes <- NULL
colnames(spikedat) <- c("N", "t", "v")
pf_spikes <- spikes::spikes(spikedat)
pf_plotframe <- get_plotframe_spikes(pf_spikes)
pf_rkdplot <- pf_plotframe$plotframe |> pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
                                                     values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("PF") +
  theme(legend.title = element_blank())
ggsave(pf_rkdplot, filename = "FigureB13.jpg",
       width = 12, height = 8, scale = 0.6)


## UPND ----------------------------------------------------------------------
set.seed(1964)
upnd_tmp <- rkd[which(rkd$party=="UPND"),]
spikedat <- upnd_tmp[, c("voters", "sum_of_votes", "votes", "valid_votes")]
spikedat <- spikedat[which(spikedat$voters>spikedat$sum_of_votes),]
spikedat <- spikedat[which(spikedat$sum_of_votes>spikedat$votes),]
spikedat$valid_votes <- NULL
colnames(spikedat) <- c("N", "t", "v")
upnd_spikes <- spikes::spikes(spikedat, grid = 1001)
upnd_plotframe <- get_plotframe_spikes(upnd_spikes)
upnd_rkdplot <- upnd_plotframe$plotframe |> pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
                                                         values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = upnd_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("UPND") +
  theme(legend.title = element_blank())
ggsave(upnd_rkdplot, filename = "FigureB14.jpg",
       width = 12, height = 8, scale = 0.6)




## List flagged constituencies ---------------------------------------------
upnd_fraud <- tibble(party = "UPND", "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(upnd_spikes)[[2]]) ))) |> separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
pf_fraud   <- tibble(party = "PF", "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(pf_spikes)[[2]]) ))) |> separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
fraud_ranges <- bind_rows(upnd_fraud, pf_fraud)
rkd$row_id <- 1:nrow(rkd)
tmp <- left_join(rkd[, c("row_id", "party", "votes", "sum_of_votes")], fraud_ranges)
tmp$pct_votes <- tmp$votes/tmp$sum_of_votes
tmp$flag <- ifelse(tmp$pct_votes >= tmp$start & tmp$pct_votes<=tmp$stop, 1, 0)
tmp <- tmp |> subset(party %in% c("PF", "UPND")) |>
  group_by(row_id, party) |> summarise(flag = max(flag))
tmp <- tmp |> pivot_wider(names_from = "party", values_from = flag, names_prefix = "rkd_flag_", values_fill = 0)
tmp$rkd_flag_any <- ifelse((tmp$rkd_flag_UPND + tmp$rkd_flag_PF) > 0, 1, 0)
rkd <- left_join(rkd, tmp)
rm(tmp)
rkd$rkd_flag_any[which(is.na(rkd$rkd_flag_any)==TRUE)] <- 0
rkd$rkd_flag_PF[which(is.na(rkd$rkd_flag_PF)==TRUE)] <- 0
rkd$rkd_flag_UPND[which(is.na(rkd$rkd_flag_UPND)==TRUE)] <- 0
rkd_constituencies <- rkd |> 
  group_by(constituency_name) |>
  summarise("rkd_flag_any" = max(rkd_flag_any),
            "rkd_flag_PF" = max(rkd_flag_PF),
            "rkd_flag_UPND" = max(rkd_flag_UPND))
table(rkd_constituencies$rkd_flag_PF)


# RKD Presidential --------------------------------------------------------

## 2011 --------------------------------------------------------------------
pres_2011 <- pres[which(pres$election_year==2011),]
pres_2011 <- pres_2011[pres_2011$party %in% c("MMD", "PF"),]

### MMD --------------------------------------------------------------------
rkd_mmd_pres_2011 <- pres_2011[which(pres_2011$party=="MMD"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_mmd_pres_2011) <- c("N", "t", "v")

set.seed(1964)
mmd_pres_2011_spikes <- spikes::spikes(rkd_mmd_pres_2011)
mmd_pres_2011_plotframe <- get_plotframe_spikes(mmd_pres_2011_spikes)
mmd_pres_2011_rkdplot <- mmd_pres_2011_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = mmd_pres_2011_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2011: MMD") +
  theme(legend.title = element_blank())
ggsave(mmd_pres_2011_rkdplot, filename = "FigureB2.jpg",
       width = 12, height = 8, scale = 0.6)



### PF --------------------------------------------------------------------
rkd_pf_pres_2011 <- pres_2011[which(pres_2011$party=="PF"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_pf_pres_2011) <- c("N", "t", "v")

set.seed(1964)
pf_pres_2011_spikes <- spikes::spikes(rkd_pf_pres_2011)
pf_pres_2011_plotframe <- get_plotframe_spikes(pf_pres_2011_spikes)
pf_pres_2011_rkdplot <- pf_pres_2011_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_pres_2011_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2011: PF") +
  theme(legend.title = element_blank())
ggsave(pf_pres_2011_rkdplot, filename = "FigureB4.jpg",
       width = 12, height = 8, scale = 0.6)



## 2016 --------------------------------------------------------------------
pres_2016 <- pres[which(pres$election_year==2016),]
pres_2016 <- pres_2016[pres_2016$party %in% c("UPND", "PF"),]


### UPND --------------------------------------------------------------------
rkd_upnd_pres_2016 <- pres_2016[which(pres_2016$party=="UPND"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_upnd_pres_2016) <- c("N", "t", "v")

set.seed(1963)
upnd_pres_2016_spikes <- spikes::spikes(rkd_upnd_pres_2016)
upnd_pres_2016_plotframe <- get_plotframe_spikes(upnd_pres_2016_spikes)
upnd_pres_2016_rkdplot <- upnd_pres_2016_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = upnd_pres_2016_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2016: UPND") +
  theme(legend.title = element_blank())
upnd_pres_2016_rkdplot
ggsave(upnd_pres_2016_rkdplot, filename = "FigureB6.jpg",
       width = 12, height = 8, scale = 0.6)


### PF --------------------------------------------------------------------
rkd_pf_pres_2016 <- pres_2016[which(pres_2016$party=="PF"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_pf_pres_2016) <- c("N", "t", "v")

set.seed(1964)
pf_pres_2016_spikes <- spikes::spikes(rkd_pf_pres_2016)
pf_pres_2016_plotframe <- get_plotframe_spikes(pf_pres_2016_spikes)
pf_pres_2016_rkdplot <- pf_pres_2016_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_pres_2016_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2016: PF") +
  theme(legend.title = element_blank())
pf_pres_2016_rkdplot
ggsave(pf_pres_2016_rkdplot, filename = "FigureB8.jpg",
       width = 12, height = 8, scale = 0.6)



## 2021 --------------------------------------------------------------------
pres_2021 <- pres[which(pres$election_year==2021),]
pres_2021 <- pres_2021[pres_2021$party %in% c("UPND", "PF"),]

### UPND --------------------------------------------------------------------
rkd_upnd_pres_2021 <- pres_2021[which(pres_2021$party=="UPND"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_upnd_pres_2021) <- c("N", "t", "v")

set.seed(1965) #Error for 1964
upnd_pres_2021_spikes <- spikes::spikes(rkd_upnd_pres_2021)
upnd_pres_2021_plotframe <- get_plotframe_spikes(upnd_pres_2021_spikes)
upnd_pres_2021_rkdplot <- upnd_pres_2021_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = upnd_pres_2021_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2021: UPND") +
  theme(legend.title = element_blank())
ggsave(upnd_pres_2021_rkdplot, filename = "FigureB10.jpg",
       width = 12, height = 8, scale = 0.6)


### PF --------------------------------------------------------------------
rkd_pf_pres_2021 <- pres_2021[which(pres_2021$party=="PF"), c("registered_voters", "sum_of_votes", "votes")]
colnames(rkd_pf_pres_2021) <- c("N", "t", "v")

set.seed(1965)  #Error for 1964
pf_pres_2021_spikes <- spikes::spikes(rkd_pf_pres_2021, grid = 200)
pf_pres_2021_plotframe <- get_plotframe_spikes(pf_pres_2021_spikes)
pf_pres_2021_rkdplot <- pf_pres_2021_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_pres_2021_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Presidential election 2021: PF") +
  theme(legend.title = element_blank())
pf_pres_2021_rkdplot
ggsave(pf_pres_2021_rkdplot, filename = "FigureB12.jpg",
       width = 12, height = 8, scale = 0.6)



# RKD Parliamentary --------------------------------------------------------

## 2011 --------------------------------------------------------------------
parl_2011 <- parl[which(parl$year == 2011 & parl$party_candidate %in% c("mmd", "pf")), ]

### MMD --------------------------------------------------------------------
rkd_mmd_parl_2011 <- na.omit(parl_2011[which(parl_2011$party_candidate=="mmd"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_mmd_parl_2011) <- c("N", "t", "v")

set.seed(1964)
mmd_parl_2011_spikes <- spikes::spikes(rkd_mmd_parl_2011)
mmd_parl_2011_plotframe <- get_plotframe_spikes(mmd_parl_2011_spikes)
mmd_parl_2011_rkdplot <- mmd_parl_2011_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = mmd_parl_2011_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2011: MMD") +
  theme(legend.title = element_blank())
mmd_parl_2011_rkdplot
ggsave(mmd_parl_2011_rkdplot, filename = "FigureB1.jpg",
       width = 12, height = 8, scale = 0.6)



### PF --------------------------------------------------------------------
rkd_pf_parl_2011 <- na.omit(parl_2011[which(parl_2011$party_candidate=="pf"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_pf_parl_2011) <- c("N", "t", "v")

pf_parl_2011_spikes <- spikes::spikes(rkd_pf_parl_2011)
pf_parl_2011_plotframe <- get_plotframe_spikes(pf_parl_2011_spikes)
pf_parl_2011_rkdplot <- pf_parl_2011_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_parl_2011_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2011: PF") +
  theme(legend.title = element_blank())
pf_parl_2011_rkdplot
ggsave(pf_parl_2011_rkdplot, filename = "FigureB3.jpg",
       width = 12, height = 8, scale = 0.6)


## 2016 --------------------------------------------------------------------
parl_2016 <- parl[which(parl$year==2016),]
parl_2016 <- parl_2016[parl_2016$party_candidate %in% c("upnd", "pf"),]

### UPND --------------------------------------------------------------------
rkd_upnd_parl_2016 <- na.omit(parl_2016[which(parl_2016$party_candidate=="upnd"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_upnd_parl_2016) <- c("N", "t", "v")

set.seed(1964)
upnd_parl_2016_spikes <- spikes::spikes(rkd_upnd_parl_2016)
upnd_parl_2016_plotframe <- get_plotframe_spikes(upnd_parl_2016_spikes)
upnd_parl_2016_rkdplot <- upnd_parl_2016_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = upnd_parl_2016_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2016: UPND") +
  theme(legend.title = element_blank())
upnd_parl_2016_rkdplot
ggsave(upnd_parl_2016_rkdplot, filename = "FigureB5.jpg",
       width = 12, height = 8, scale = 0.6)


### PF --------------------------------------------------------------------
rkd_pf_parl_2016 <- na.omit(parl_2016[which(parl_2016$party_candidate=="pf"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_pf_parl_2016) <- c("N", "t", "v")

set.seed(1964)
pf_parl_2016_spikes <- spikes::spikes(rkd_pf_parl_2016)
pf_parl_2016_plotframe <- get_plotframe_spikes(pf_parl_2016_spikes)
pf_parl_2016_rkdplot <- pf_parl_2016_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_parl_2016_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2016: PF") +
  theme(legend.title = element_blank())
pf_parl_2016_rkdplot
ggsave(pf_parl_2016_rkdplot, filename = "FigureB7.jpg",
       width = 12, height = 8, scale = 0.6)

## 2021 --------------------------------------------------------------------
parl_2021 <- parl[which(parl$year==2021 & parl$party_candidate %in% c("upnd", "pf")),]

### UPND --------------------------------------------------------------------
rkd_upnd_parl_2021 <- na.omit(parl_2021[which(parl_2021$party_candidate=="upnd"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_upnd_parl_2021) <- c("N", "t", "v")

set.seed(1965)
upnd_parl_2021_spikes <- spikes::spikes(rkd_upnd_parl_2021)
upnd_parl_2021_plotframe <- get_plotframe_spikes(upnd_parl_2021_spikes)
upnd_parl_2021_rkdplot <- upnd_parl_2021_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = upnd_parl_2021_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2021: UPND") +
  theme(legend.title = element_blank())
upnd_parl_2021_rkdplot
ggsave(upnd_parl_2021_rkdplot, filename = "FigureB9.jpg",
       width = 12, height = 8, scale = 0.6)


### PF --------------------------------------------------------------------
rkd_pf_parl_2021 <- na.omit(parl_2021[which(parl_2021$party_candidate=="pf"), c("regist", "sum_of_votes", "votes")])
colnames(rkd_pf_parl_2021) <- c("N", "t", "v")

set.seed(1965)
pf_parl_2021_spikes <- spikes::spikes(rkd_pf_parl_2021, grid = 500)
pf_parl_2021_plotframe <- get_plotframe_spikes(pf_parl_2021_spikes)
pf_parl_2021_rkdplot <- pf_parl_2021_plotframe$plotframe |>
  pivot_longer(kernel_density_upper_bound:observed_vote_shares, names_to = "stat",
               values_to = "values") |>
  mutate(stat = ifelse(stat == "kernel_density_upper_bound", "RKD upper bound", "Observed in data")) |>
  ggplot(aes(x = vote_share_bin, y = values, color = stat)) + 
  geom_vline(xintercept = pf_parl_2021_plotframe$fraud_points, linetype = 2, color ="red") +
  geom_line() +
  xlab("Vote share") + ylab("Density") +
  scale_color_manual(values = c("black", "grey50")) +
  theme_classic() +
  ggtitle("Parliamentary election 2021: PF") +
  theme(legend.title = element_blank())
pf_parl_2021_rkdplot
ggsave(pf_parl_2021_rkdplot, filename = "FigureB11.jpg",
       width = 12, height = 8, scale = 0.6)


# Tag red-flag RKD constituencies --------------------------------------------------

## Polling stations ------------------------------------------------------------
upnd_fraud <- tibble(party = "UPND", "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(upnd_spikes)[[2]]) ))) |> separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
pf_fraud   <- tibble(party = "PF", "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(pf_spikes)[[2]]) ))) |> separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
fraud_ranges <- bind_rows(upnd_fraud, pf_fraud)
rkd$row_id <- 1:nrow(rkd)
tmp <- left_join(rkd[, c("row_id", "party", "votes", "sum_of_votes")], fraud_ranges)
tmp$pct_votes <- tmp$votes/tmp$sum_of_votes
tmp$flag <- ifelse(tmp$pct_votes >= tmp$start & tmp$pct_votes<=tmp$stop, 1, 0)
tmp <- tmp |> subset(party %in% c("PF", "UPND")) |>
  group_by(row_id, party) |> summarise(flag = max(flag))
tmp <- tmp |> pivot_wider(names_from = "party", values_from = flag, names_prefix = "rkd_flag_", values_fill = 0)
tmp$rkd_flag_any <- ifelse((tmp$rkd_flag_UPND + tmp$rkd_flag_PF) > 0, 1, 0)
rkd <- left_join(rkd, tmp)
rm(tmp)

rkd$rkd_flag_any[which(is.na(rkd$rkd_flag_any)==TRUE)] <- 0
rkd$rkd_flag_PF[which(is.na(rkd$rkd_flag_PF)==TRUE)] <- 0
rkd$rkd_flag_UPND[which(is.na(rkd$rkd_flag_UPND)==TRUE)] <- 0
rkd_constituencies <- rkd |> 
  group_by(constituency_name) |>
  summarise("rkd_flag_any" = max(rkd_flag_any),
            "rkd_flag_PF" = max(rkd_flag_PF),
            "rkd_flag_UPND" = max(rkd_flag_UPND))


## Constituencies ------------------------------------------------------------
mmd_pres_fraud <- tibble(election_year = 2011,
                         party = "MMD",
                         "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(mmd_pres_2011_spikes)[[2]]) ))) |>
  separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
pres_2011$row_id <- 1:nrow(pres_2011)
tmp <- left_join(pres_2011[, c("row_id", "election_year", "party", "votes", "sum_of_votes")],
                 mmd_pres_fraud)
tmp$pct_votes <- tmp$votes/tmp$sum_of_votes
tmp$flag <- ifelse(tmp$pct_votes >= tmp$start & tmp$pct_votes<=tmp$stop, 1, 0)
tmp <- tmp |> subset(party %in% c("MMD")) |>
  group_by(row_id, party) |> summarise(flag = max(flag))
tmp <- tmp |> pivot_wider(names_from = "party", values_from = flag, names_prefix = "rkd_flag_pres_2011_", values_fill = 0)
mmd_pres_2011_fraud <- tmp
mmd_pres_2011_fraud <- left_join(mmd_pres_2011_fraud, pres_2011[, c("row_id", "constituency_name")])
mmd_pres_2011_fraud <- full_join(mmd_pres_2011_fraud, unique(pres_2011["constituency_name"]))


pf_parl_fraud <- tibble(year = 2011,
                        party_candidate = "pf",
                        "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(pf_parl_2011_spikes)[[2]]) ))) |>
  separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
parl_2011$row_id <- 1:nrow(parl_2011)
tmp <- left_join(parl_2011[, c("row_id", "year", "party_candidate", "votes", "sum_of_votes")],
                 pf_parl_fraud)
tmp$pct_votes <- tmp$votes/tmp$sum_of_votes
tmp$flag <- ifelse(tmp$pct_votes >= tmp$start & tmp$pct_votes<=tmp$stop, 1, 0)
tmp <- tmp |> subset(party_candidate %in% c("pf")) |>
  group_by(row_id, party_candidate) |> summarise(flag = max(flag))
tmp <- tmp |> pivot_wider(names_from = "party_candidate", values_from = flag, names_prefix = "rkd_flag_parl_2011_", values_fill = 0)
pf_parl_2011_fraud <- tmp
rm(tmp)


pf_parl_2011_fraud <- left_join(pf_parl_2011_fraud,
                                parl_2011[, c("row_id", "constituency_candidate")])
pf_parl_2011_fraud <- full_join(pf_parl_2011_fraud, unique(parl_2011["constituency_candidate"]))
pf_parl_2011_fraud$rkd_flag_parl_2011_pf[which(is.na(pf_parl_2011_fraud$rkd_flag_parl_2011_pf))] <- 0

upnd_parl_fraud <- tibble(year = 2021,
                          party_candidate = "upnd",
                          "ranges" = gsub("\\]", "", gsub("\\(", "", names(get_fraudulent_result(upnd_parl_2021_spikes)[[2]]) ))) |>
  separate_wider_delim(cols = ranges, delim = ",", names = c("start", "stop"))
parl_2021$row_id <- 1:nrow(parl_2021)
tmp <- left_join(parl_2021[, c("row_id", "year", "party_candidate", "votes", "sum_of_votes")],
                 upnd_parl_fraud)
tmp$pct_votes <- tmp$votes/tmp$sum_of_votes
tmp$flag <- ifelse(tmp$pct_votes >= tmp$start & tmp$pct_votes<=tmp$stop, 1, 0)
tmp <- tmp |> subset(party_candidate %in% c("upnd")) |>
  group_by(row_id, party_candidate) |> summarise(flag = max(flag))
tmp <- tmp |> pivot_wider(names_from = "party_candidate", values_from = flag,
                          names_prefix = "rkd_flag_parl_2021_", values_fill = 0)
upnd_parl_2021_fraud <- tmp
upnd_parl_2021_fraud <- left_join(upnd_parl_2021_fraud, parl_2021[, c("row_id", "constituency_candidate")])
upnd_parl_2021_fraud <- full_join(upnd_parl_2021_fraud, unique(parl_2021["constituency_candidate"]))
upnd_parl_2021_fraud$rkd_flag_parl_2021_upnd[which(is.na(upnd_parl_2021_fraud$rkd_flag_parl_2021_upnd))] <- 0


upnd_parl_2021_fraud  <- upnd_parl_2021_fraud |> ungroup() |> rename(constituency_name = constituency_candidate) |>  select(-row_id)
pf_parl_2011_fraud    <- pf_parl_2011_fraud   |> ungroup() |> rename(constituency_name = constituency_candidate) |>  select(-row_id)
mmd_pres_2011_fraud   <- mmd_pres_2011_fraud  |> ungroup() |> mutate(constituency_name = tolower(constituency_name)) |>  select(-row_id)

constituency_rkd_fraud <- full_join(upnd_parl_2021_fraud, pf_parl_2011_fraud)
constituency_rkd_fraud <- full_join(constituency_rkd_fraud, mmd_pres_2011_fraud)

constituency_rkd_fraud <- constituency_rkd_fraud |> replace_na(replace = list(rkd_flag_parl_2021_upnd = 0, 
                                                                              rkd_flag_pres_2011_MMD = 0, 
                                                                              rkd_flag_parl_2011_pf = 0))
colnames(constituency_rkd_fraud) <- tolower(colnames(constituency_rkd_fraud))

## Alter constituency names
constituency_rkd_fraud$constituency_name[which(constituency_rkd_fraud$constituency_name=="kapoche")] <-"kaumbwe"
rkd_constituencies$constituency_name[which(rkd_constituencies$constituency_name=="itezhi-tezhi")] <- "itezhi tezhi"

##    Alter colnames
rkd_constituencies <- rkd_constituencies |> rename(polling_station_2021_rkd_flag_any = rkd_flag_any,
                                                   polling_station_2021_rkd_flag_pf = rkd_flag_PF,
                                                   polling_station_2021_rkd_flag_upnd = rkd_flag_UPND)

constituency_rkd_fraud <- constituency_rkd_fraud |> rename(constituency_parliamentary_2011_rkd_flag_pf = rkd_flag_parl_2011_pf,
                                                           constituency_presidential_2011_rkd_flag_mmd = rkd_flag_pres_2011_mmd,
                                                           constituency_parliamentary_2021_rkd_flag_upnd = rkd_flag_parl_2021_upnd)



fraud_red_flags <- full_join(rkd_constituencies, constituency_rkd_fraud)
fraud_red_flags$constituency_any_rkd_flag_2011 <- ifelse((fraud_red_flags$constituency_presidential_2011_rkd_flag_mmd + fraud_red_flags$constituency_parliamentary_2011_rkd_flag_pf) > 0, 1, 0)
fraud_red_flags$any_flag_2021 <- ifelse(is.na(fraud_red_flags$polling_station_2021_rkd_flag_any)==FALSE & (fraud_red_flags$polling_station_2021_rkd_flag_any + fraud_red_flags$constituency_parliamentary_2021_rkd_flag_upnd) > 0, 1,
                                        ifelse(is.na(fraud_red_flags$polling_station_2021_rkd_flag_any)==TRUE, fraud_red_flags$constituency_parliamentary_2021_rkd_flag_upnd,
                                               0))
fraud_red_flags


# Co-occurence ------------------------------------------------------------
rkd$turnout <- rkd$sum_of_votes/rkd$voters
rkd$pct_votes <- rkd$votes/rkd$sum_of_votes

turnout_votepct <- rkd |> subset(party %in% c("PF", "UPND") & turnout <= 1) |>
  ggplot(aes(x = turnout, y = pct_votes)) +
  facet_wrap( ~ party) +
  geom_point() +
  geom_smooth(method = "loess") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(labels = scales::percent) +
  theme_classic() +
  xlab("Turnout") + ylab("Percent votes for party") +
  theme(strip.background = element_rect(fill = "grey80"))
ggsave(turnout_votepct, filename = "FigureB15.jpg",
       width = 12, height = 12, scale = 0.6)



## Constituency ------------------------------------------------------------
cooccurence_constituency_pres <-  pres |> ungroup() |> subset(party %in%  c("PF", "UPND")) |> 
  ggplot(aes(x = turnout, y = pct_sum_of_votes)) +
  facet_grid(election_year ~ party) +
  geom_point() +
  geom_smooth(method = "loess") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  scale_x_continuous(labels = scales::percent, limits = c(0.2, 1)) +
  theme_classic() +
  xlab("Turnout") + ylab("Percent votes for party") +
  labs(title = "Presidential elections") +
  theme(strip.background = element_rect(fill = "grey80"),
        panel.border = element_rect(colour = "black", fill=NA, linewidth=2))
cooccurence_constituency_pres
ggsave(cooccurence_constituency_pres, filename = "FigureB16.jpg",
       width = 10, height = 12, scale = 0.6)



parl$party <- toupper(parl$party_candidate)
cooccurence_constituency_parl <-  parl |> ungroup() |> subset(party %in%  c("PF", "UPND") & year>=2011) |> 
  ggplot(aes(x = turnout, y = pct_sum_of_votes)) +
  facet_grid(year ~ party) +
  geom_point() +
  geom_smooth(method = "loess") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  scale_x_continuous(labels = scales::percent, limits = c(0.2, 1)) +
  theme_classic() +
  xlab("Turnout") + ylab("Percent votes for party") +
  labs(title = "Parliamentary elections") +
  theme(strip.background = element_rect(fill = "grey80"),
        panel.border = element_rect(colour = "black", fill=NA, linewidth=2))
cooccurence_constituency_parl
ggsave(cooccurence_constituency_parl, filename = "FigureB17.jpg",
       width = 10, height = 12, scale = 0.6)



